*******		REPLICATION FILES
*******		Climate Variability and Irregular Migration to the European Union 
*******		Global Environmental Change 
*******		Fabien Cottier and Idean Salehyan
*******		Replication: Appendix A.2: Summary statistics
*******		This version: April 2021

* note: stata packages required for running models
* - coefplot
* - crossfold
* - plottig
* - estout


clear all

set more off
set scheme plottig
cd "path/to/replication/directory"

* load data
use "data_quarter.dta", clear 

* load stata risk scripts
qui do scripts/script_rr_spei_082019

* duplicates data
duplicates list cowc year quarter
duplicates tag cowc year quarter, gen(_dupl)
duplicates drop cowc year quarter, force

* set time series
sort cowc year quarter
gen quarter_int = real(regexs(1)) if regexm(quarter,"([0-9]+)")
gen quarterS=(year-2005)*4+quarter_int
order cowc nationality year quarter quarter_int quarterS
tsset cowc quarterS
 
* gen additional variables
encode continent, gen(contN)
recode contN (5=.)


* generate lag quartely migration variables
by cowc: gen nmigrq_exbalk_1tl = nmigrq_exbalk[_n-1]
by cowc: gen nmigrq_exbalk_2tl = nmigrq_exbalk[_n-2]
by cowc: gen nmigrq_exbalk_3tl = nmigrq_exbalk[_n-3]
by cowc: gen nmigrq_exbalk_4tl = nmigrq_exbalk[_n-4]
by cowc: gen nmigrq_exbalk_ln_1tl = nmigrq_exbalk_ln[_n-1]
by cowc: gen nmigrq_exbalk_ln_2tl = nmigrq_exbalk_ln[_n-2]
by cowc: gen nmigrq_exbalk_ln_3tl = nmigrq_exbalk_ln[_n-3]
by cowc: gen nmigrq_exbalk_ln_4tl = nmigrq_exbalk_ln[_n-4]


* gen dummies variables for sample splitting: low agriculture / high agriculture
qui xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* i.year if migr100_exbalk==1, ///
 fe vce(cluster cowc)

sum agriLab if e(sample) & year==2010, d
scalar agriMed=r(p50)
gen agri_H=0 if e(sample) & year==2010
recode agri_H (0=1) if agriLab-agriMed > 0 & year==2010
by cowc: replace agri_H = agri_H[21] if missing(agri_H)

* generate dummies variable for extreme weather (cut-off 10 percentile / 90 percentile)
qui xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_speiy /// 
 i.year i.quarter_int if migr100_exbalk==1, ///
 fe vce(cluster cowc)

centile (wmean_speiy) if e(sample), centile (10 90)
gen spei_drought = 0 if wmean_speiy!=.
recode spei_drought (0=1) if wmean_speiy <= r(c_1) 
centile (wmean_speiy) if e(sample), centile (10 90)
gen spei_hrain = 0 if wmean_speiy!=.
recode spei_hrain (0=1) if wmean_speiy >=  r(c_2) 



******* Summary statistics



* Table A.2: Summary statistics

gen Rmigrq_exbalk=(nmigrq/pop)*(10^5)
quietly xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_speiy i.year if migr100_exbalk==1, ///
 fe vce(cluster cowc)
qui estpost summarize nmigrq_exbalk Rmigrq_exbalk /// 
 wmean_speiy wmean_spei_gs wmean_speiq mean_speiy ///
 stdw10ma_tempy stdw10ma_precipy if e(sample)
esttab, cells("count mean sd min max") noobs replace ///
   varlabels(nmigrq_exbalk "N Migr" Rmigrq_exbalk "N Migr per 100K inhabitants (S2)" ///
   wmean_speiy "SPEI, pop weighted" wmean_spei_gs "SPEI, pop weighted, growing season (S1)" ///
   wmean_speiq "SPEI, pop weighted, quarterly (S5)" mean_speiy "SPEI, no weight (S7)" ///
   stdw10ma_tempy "Temp anomalies(S9)" stdw10ma_precipy "Precip anomalies (S9)" )




* Table A.3: correlation matrix

quietly xtreg nmigrq_exbalk_ln nmigrq_exbalk_ln_* wmean_speiy i.year if migr100_exbalk==1, ///
 fe vce(cluster cowc)
cor L0.wmean_speiy L4.wmean_speiy L8.wmean_speiy if e(sample)
 





* Figure A.2: kernel density plot wmean_speiy
kdensity wmean_speiy if e(sample)
 